Load data

Load the Differentially Expression Analysis results obtained by Miquel.

DEG<-read.csv("Data/DEA_results.csv")
names(DEG)[names(DEG) == "X"] <-"transcript_id"
# head(DEG)
# dim(DEG)

Load the transcriptome annotations

Raw_Annots<-read.csv("Data/Annotated_Transcriptome_summarisedbyTranscript_V5.tsv", sep="\t")## File toobig for GitHub. Contact me if you need it.
#dim(Raw_Annots)
#Raw_Annots

Join DEG list and Annotatiosn

DEG_Annots<-DEG %>%
  left_join(., Raw_Annots, by=("transcript_id" ) )

#DEG_Annots

PCA all genes

Table_vst<-read.csv("Data/Table_vst.csv",  row.names = 1)

Design_matrix<-data.frame(Sample_names=colnames(Table_vst), Biological_condition=sapply(strsplit(colnames(Table_vst),"_rep"), `[`, 1))
rownames(Design_matrix)<-Design_matrix$Sample_names
# Do PCA
PCA <- prcomp(t(as.data.frame(Table_vst)))
# Extract PC axes for plotting
PCAvalues <- data.frame(samples=sapply(strsplit(colnames(Table_vst),"_rep"), `[`, 1), PCA$x)
#summary(PCA)


# Plot
ggplot(PCAvalues, aes(x = PC1, y = PC2, colour = samples)) +
    geom_point(size = 3)+
    xlab(paste0("PC1: ",round(summary(PCA)$importance[2,"PC1"]*100,2),"% variance")) +
   ylab(paste0("PC2: ",round(summary(PCA)$importance[2,"PC2"]*100,2),"% variance")) +
    geom_text_repel(label = rownames(PCAvalues), size=3)+
  ggtitle("PCA - All genes - VST")

PCA with DEA genes only

List_DEA_genes<-DEG[which(DEG$padj<0.01),"transcript_id"]
#length(List_DEA_genes)

DEA_genes_vst<-Table_vst[which(rownames(Table_vst) %in% List_DEA_genes),]
#dim(DEA_genes_vst) 

# Do PCA
PCA_DEgenes <- prcomp(t(as.data.frame(DEA_genes_vst)))
# Extract PC axes for plotting
#PCAvalues_DEgenes <- data.frame(samples=sapply(strsplit(colnames(DEA_genes_vst),"_rep"), `[`, 1), PCA$x)

PCAvalues_DEgenes <- data.frame(samples=sapply(strsplit(colnames(DEA_genes_vst),"_rep"), `[`, 1), PCA_DEgenes$x)
#summary(PCAvalues_DEgenes)


# Plot
ggplot(PCAvalues_DEgenes, aes(x = PC1, y = PC2, colour = samples)) +
    geom_point(size = 3)+
    xlab(paste0("PC1: ",round(summary(PCA_DEgenes)$importance[2,"PC1"]*100,2),"% variance")) +
   ylab(paste0("PC2: ",round(summary(PCA_DEgenes)$importance[2,"PC2"]*100,2),"% variance")) +
  geom_text_repel(label = rownames(PCAvalues_DEgenes), size=3)+
  ggtitle("PCA - All genes - VST")

Create an Organism Packages with AnnotationForge.

library(AnnotationForge)

Gene_Table<-data.frame(GID=DEG_Annots$transcript_id,
                       BlastX=DEG_Annots$sprot_Top_BLASTX_hit,
                       BlastP=DEG_Annots$sprot_Top_BLASTP_hit,
                       BlastX_Name=DEG_Annots$BlastX_ProtName,
                       BlastP_Name=DEG_Annots$BlastP_ProtName,
                       BlastSppHit=DEG_Annots$SPECIES)

GoTable<-DEG_Annots %>%
  dplyr::select("transcript_id","PfamBlastGos_UNIQ_clean") %>%
  separate_rows(. ,"PfamBlastGos_UNIQ_clean", sep=";") %>%
  filter(!is.na(PfamBlastGos_UNIQ_clean)) %>%
  filter(!str_detect(PfamBlastGos_UNIQ_clean, '\\.')) %>%
  dplyr::rename(GID="transcript_id" , GO="PfamBlastGos_UNIQ_clean") %>%
  tibble::add_column(EVIDENCE="IEA") %>%
  as.data.frame()


## Too big for GitHub, contact me if you need the package.
# makeOrgPackage(gene_info=Gene_Table, go=GoTable,
#                       version = "0.1",
#                      maintainer="Some One <so@someplace.org>",
#                      author="Some One <so@someplace.org>",
#                        outputDir = "/home/ysland/Documents/Paracletus_project/RNA_seq_data/",
#                        tax_id = "223034.",
#                        genus = "Paracletus",
#                        species = "cimiciformis",
#                        goTable="go")
# 
# 
# ## then you can call install.packages based on the return value

Gene Ontology Enroichment Analysis on DE genes by Morf

library("org.Pcimiciformis.eg.db")

table(DEG[which(DEG$padj<0.01),]$log2FoldChange>0)
## 
## FALSE  TRUE 
##   576   511

DEA p-value <0.01

library(org.Pcimiciformis.eg.db)



GENESList<-list(
  DEA_genes_Up_FM=DEG[which(DEG$padj<0.01 & DEG$log2FoldChange>0),"transcript_id"],
  DEA_genes_Up_RM=DEG[which(DEG$padj<0.01 & DEG$log2FoldChange<0),"transcript_id"]
)


table(GENESList$DEA_genes_Up_FM %in% GoTable$GID)
table(GENESList$DEA_genes_Up_RM %in% GoTable$GID)

DE_byMorf_BP<- compareCluster(geneCluster = GENESList,
                      OrgDb= org.Pcimiciformis.eg.db, 
                      keyType = 'GID',
                      fun = "enrichGO",
                      ont  = "BP",
                      pvalueCutoff  = 0.05,
                      pAdjustMethod= "BH", 
                      qvalueCutoff  = 0.05,
                      readable=FALSE)


DE_byMorf_MF<- compareCluster(geneCluster = GENESList,
                      OrgDb= org.Pcimiciformis.eg.db, 
                      keyType = 'GID',
                      fun = "enrichGO",
                      ont  = "MF",
                      pvalueCutoff  = 0.05,
                      pAdjustMethod= "BH", 
                      qvalueCutoff  = 0.05,
                      readable=FALSE)

DE_byMorf_CC<- compareCluster(geneCluster = GENESList,
                      OrgDb= org.Pcimiciformis.eg.db, 
                      keyType = 'GID',
                      fun = "enrichGO",
                      ont  = "CC",
                      pvalueCutoff  = 0.05,
                      pAdjustMethod= "BH", 
                      qvalueCutoff  = 0.05,
                      readable=FALSE)


#annotationDbi::select(org.Pcimiciformis.eg.db,keys=DE_genes_Groups_long$Gene,columns=c("ENTREZID", "SYMBOL","GENENAME","FLYBASECG" ),keytype="FLYBASE")

# write.csv(DE_byMorf_BP, file = "DE_p001_byMorf_BP.csv")
# write.csv(DE_byMorf_MF, file = "DE_p001_byMorf_MF.csv")
# write.csv(DE_byMorf_CC, file = "DE_p001_byMorf_CC.csv")

Biological Process

dotplot(DE_byMorf_BP , showCategory=50, includeAll=TRUE)+ggtitle("Significant GO terms enrich in DE genes (p<0.01)  -- BP ")

 emapplot(DE_byMorf_BP,layout="nicely")+ggtitle("Significant GO terms enrich in DE genes (p<0.01)  -- BP ")

Restrict to Level 5

DE_byMorf_BP_Simplified_level<-gofilter(DE_byMorf_BP, level=6) 
dotplot(DE_byMorf_BP_Simplified_level , showCategory=50, includeAll=TRUE)+ggtitle("Significant GO terms enrich in DE genes (p<0.01)  -- BP -- Level 6")

Example: trehalose
GO0005991<-DE_byMorf_BP_Simplified_level@compareClusterResult[which(DE_byMorf_BP_Simplified_level@compareClusterResult$Description=="trehalose metabolic process"),]
GO0005991
GO0005991_list<-list(
  Up_FM=unlist(strsplit(GO0005991[which(GO0005991$Cluster=="DEA_genes_Up_FM"),"geneID"], "/")),
  Up_RM=unlist(strsplit(GO0005991[which(GO0005991$Cluster=="DEA_genes_Up_RM"),"geneID"], "/"))
)


VennDiagram::venn.diagram(GO0005991_list,
                            category.names =names(GO0005991_list),
                          output=TRUE,
                        filename = "Venn_GO0005991.png" ,
                        imagetype="png",
                        # Set names
                      cat.cex = 0.6,
                     fill =  viridis(2))
## [1] 1
GO0005991_list
## $Up_FM
## [1] "TRINITY_DN73_c0_g1_i5"  "TRINITY_DN73_c0_g1_i12" "TRINITY_DN73_c0_g1_i1" 
## [4] "TRINITY_DN73_c0_g1_i13" "TRINITY_DN73_c0_g1_i18"
## 
## $Up_RM
##  [1] "TRINITY_DN73_c0_g2_i1"    "TRINITY_DN7791_c0_g1_i3" 
##  [3] "TRINITY_DN64519_c0_g1_i1" "TRINITY_DN99_c1_g1_i8"   
##  [5] "TRINITY_DN7791_c0_g1_i13" "TRINITY_DN73_c0_g1_i20"  
##  [7] "TRINITY_DN1794_c3_g5_i1"  "TRINITY_DN99_c1_g1_i15"  
##  [9] "TRINITY_DN99_c1_g1_i6"    "TRINITY_DN1794_c3_g1_i1" 
## [11] "TRINITY_DN7791_c0_g1_i12" "TRINITY_DN73_c0_g2_i2"
venn

venn

Simplified

#The simplify method apply ‘select_fun’ (which can be a user defined function) to feature ‘by’ to select one representative terms from redundant terms (which have similarity higher than ‘cutoff').
DE_byMorf_BP_Simplified <- clusterProfiler::simplify(DE_byMorf_BP, cutoff=0.7, by="p.adjust", select_fun=min)
dotplot(DE_byMorf_BP_Simplified , showCategory=50, includeAll=TRUE)+ggtitle("Significant GO terms enrich in DE genes (p<0.01)  -- BP -- Simplified")

Molecular Function

dotplot(DE_byMorf_MF , showCategory=50, includeAll=TRUE)+ggtitle("Significant GO terms enrich in DE genes (p<0.01)  -- MF ")

 emapplot(DE_byMorf_MF,layout="nicely")+ggtitle("Significant GO terms enrich in DE genes (p<0.01)  -- MF ")

Cellular Component

dotplot(DE_byMorf_CC , showCategory=50, includeAll=TRUE)+ggtitle("Significant GO terms enrich in DE genes (p<0.01)  -- CC ")

DEA p-value <0.05

 table(DEG[which(DEG$padj<0.05),]$log2FoldChange>0)
## 
## FALSE  TRUE 
##   844   796
GENESList_p005<-list(
  DEA_genes_Up_FM=DEG[which(DEG$padj<0.05 & DEG$log2FoldChange>0),"transcript_id"],
  DEA_genes_Up_RM=DEG[which(DEG$padj<0.05 & DEG$log2FoldChange<0),"transcript_id"]
)
DE_byMorf_BP_p005<- compareCluster(geneCluster = GENESList_p005,
                      OrgDb= org.Pcimiciformis.eg.db, 
                      keyType = 'GID',
                      fun = "enrichGO",
                      ont  = "BP",
                      pvalueCutoff  = 0.05,
                      pAdjustMethod= "BH", 
                      qvalueCutoff  = 0.05,
                      readable=FALSE)

DE_byMorf_MF_p005<- compareCluster(geneCluster = GENESList_p005,
                      OrgDb= org.Pcimiciformis.eg.db, 
                      keyType = 'GID',
                      fun = "enrichGO",
                      ont  = "MF",
                      pvalueCutoff  = 0.05,
                      pAdjustMethod= "BH", 
                      qvalueCutoff  = 0.05,
                      readable=FALSE)

DE_byMorf_CC_p005<- compareCluster(geneCluster = GENESList_p005,
                      OrgDb= org.Pcimiciformis.eg.db, 
                      keyType = 'GID',
                      fun = "enrichGO",
                      ont  = "CC",
                      pvalueCutoff  = 0.05,
                      pAdjustMethod= "BH", 
                      qvalueCutoff  = 0.05,
                      readable=FALSE)

Biological Process

dotplot(DE_byMorf_BP_p005 , showCategory=100, includeAll=TRUE)+ggtitle("Significant GO terms enrich in DE genes (p<0.05)  -- BP ")

 emapplot(DE_byMorf_BP_p005,layout="nicely")+ggtitle("Significant GO terms enrich in DE genes (p<0.05)  -- BP ")

Simplified

#The simplify method apply ‘select_fun’ (which can be a user defined function) to feature ‘by’ to select one representative terms from redundant terms (which have similarity higher than ‘cutoff').
DE_byMorf_BP_p005_Simplified <- clusterProfiler::simplify(DE_byMorf_BP_p005, cutoff=0.7, by="p.adjust", select_fun=min)
dotplot(DE_byMorf_BP_p005_Simplified , showCategory=50, includeAll=TRUE)+ggtitle("Significant GO terms enrich in DE genes (p<0.05)  -- BP -- Simplified")

Molecular Function

dotplot(DE_byMorf_MF_p005 , showCategory=100, includeAll=TRUE)+ggtitle("Significant GO terms enrich in DE genes (p<0.05)  -- MF ")

 emapplot(DE_byMorf_MF_p005,layout="nicely")+ggtitle("Significant GO terms enrich in DE genes (p<0.05)  -- MF ")

GO0042302<-DE_byMorf_MF_p005@compareClusterResult[which(DE_byMorf_MF_p005@compareClusterResult$Description=="structural constituent of cuticle"),]
GO0042302
GO0042302_list<-list(
  Up_FM=unlist(strsplit(GO0042302[which(GO0042302$Cluster=="DEA_genes_Up_FM"),"geneID"], "/")),
  Up_RM=unlist(strsplit(GO0042302[which(GO0042302$Cluster=="DEA_genes_Up_RM"),"geneID"], "/"))
)

# 
# VennDiagram::venn.diagram(GO0042302_list,
#                             category.names =names(GO0042302_list),
#                           output=TRUE,
#                         filename = "Venn_GO0042302.png" ,
#                         imagetype="png",
#                         # Set names
#                       cat.cex = 0.6,
#                      fill =  viridis(2))

GO0042302_list
## $Up_FM
##  [1] "TRINITY_DN24_c2_g2_i12"   "TRINITY_DN1765_c0_g1_i17"
##  [3] "TRINITY_DN174_c0_g1_i3"   "TRINITY_DN621_c2_g1_i2"  
##  [5] "TRINITY_DN6368_c0_g1_i7"  "TRINITY_DN174_c0_g1_i1"  
##  [7] "TRINITY_DN1765_c0_g1_i11" "TRINITY_DN24_c2_g2_i7"   
##  [9] "TRINITY_DN351_c0_g1_i36"  "TRINITY_DN411_c3_g1_i19" 
## [11] "TRINITY_DN11230_c0_g2_i1" "TRINITY_DN3670_c0_g1_i2" 
## [13] "TRINITY_DN1765_c0_g1_i27" "TRINITY_DN1765_c0_g3_i1" 
## [15] "TRINITY_DN24_c2_g5_i1"    "TRINITY_DN411_c3_g1_i18" 
## [17] "TRINITY_DN9298_c0_g1_i1"  "TRINITY_DN6368_c1_g1_i7" 
## [19] "TRINITY_DN411_c3_g1_i21"  "TRINITY_DN1765_c0_g1_i31"
## [21] "TRINITY_DN3670_c0_g1_i1"  "TRINITY_DN29616_c0_g1_i1"
## [23] "TRINITY_DN45166_c0_g1_i3"
## 
## $Up_RM
## NULL
venn

venn

Cellular Component

dotplot(DE_byMorf_CC_p005 , showCategory=100, includeAll=TRUE)+ggtitle("Significant GO terms enrich in DE genes (p<0.05)  -- CC ")

Session info

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.10
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
##  [1] stats4    parallel  grid      stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] org.Pcimiciformis.eg.db_0.1 AnnotationForge_1.30.1     
##  [3] AnnotationDbi_1.50.1        IRanges_2.22.2             
##  [5] S4Vectors_0.26.1            Biobase_2.48.0             
##  [7] BiocGenerics_0.34.0         stringr_1.4.0              
##  [9] enrichplot_1.8.1            clusterProfiler_3.16.0     
## [11] ComplexHeatmap_2.5.3        RColorBrewer_1.1-2         
## [13] forcats_0.5.0               ggrepel_0.8.2              
## [15] ggplot2_3.3.2               viridis_0.5.1              
## [17] viridisLite_0.3.0           plyr_1.8.6                 
## [19] tidyr_1.1.0                 dplyr_1.0.2                
## 
## loaded via a namespace (and not attached):
##  [1] fgsea_1.14.0         colorspace_1.4-1     rjson_0.2.20        
##  [4] ellipsis_0.3.1       ggridges_0.5.2       circlize_0.4.10     
##  [7] qvalue_2.20.0        futile.logger_1.4.3  GlobalOptions_0.1.2 
## [10] clue_0.3-57          farver_2.0.3         urltools_1.7.3      
## [13] graphlayouts_0.7.0   bit64_0.9-7.1        scatterpie_0.1.4    
## [16] xml2_1.3.2           codetools_0.2-17     splines_4.0.3       
## [19] GOSemSim_2.14.0      knitr_1.29           polyclip_1.10-0     
## [22] jsonlite_1.7.0       cluster_2.1.0        GO.db_3.11.4        
## [25] png_0.1-7            ggforce_0.3.2        BiocManager_1.30.10 
## [28] compiler_4.0.3       httr_1.4.2           rvcheck_0.1.8       
## [31] Matrix_1.2-18        formatR_1.7          tweenr_1.0.1        
## [34] htmltools_0.5.0      prettyunits_1.1.1    tools_4.0.3         
## [37] igraph_1.2.5         gtable_0.3.0         glue_1.4.2          
## [40] reshape2_1.4.4       DO.db_2.9            fastmatch_1.1-0     
## [43] Rcpp_1.0.5           vctrs_0.3.4          ggraph_2.0.3        
## [46] xfun_0.15            lifecycle_0.2.0      XML_3.99-0.5        
## [49] DOSE_3.14.0          europepmc_0.4        MASS_7.3-53         
## [52] scales_1.1.1         tidygraph_1.2.0      hms_0.5.3           
## [55] lambda.r_1.2.4       yaml_2.2.1           memoise_1.1.0       
## [58] gridExtra_2.3        downloader_0.4       triebeard_0.3.0     
## [61] stringi_1.4.6        RSQLite_2.2.0        BiocParallel_1.22.0 
## [64] shape_1.4.4          rlang_0.4.7          pkgconfig_2.0.3     
## [67] bitops_1.0-6         evaluate_0.14        lattice_0.20-41     
## [70] purrr_0.3.4          labeling_0.3         cowplot_1.0.0       
## [73] bit_1.1-15.2         tidyselect_1.1.0     magrittr_1.5        
## [76] R6_2.4.1             generics_0.0.2       DBI_1.1.0           
## [79] pillar_1.4.6         withr_2.2.0          RCurl_1.98-1.2      
## [82] tibble_3.0.3         crayon_1.3.4         futile.options_1.0.1
## [85] rmarkdown_2.3        GetoptLong_1.0.2     progress_1.2.2      
## [88] data.table_1.13.0    blob_1.2.1           digest_0.6.25       
## [91] VennDiagram_1.6.20   gridGraphics_0.5-0   munsell_0.5.0       
## [94] ggplotify_0.0.5